library(dada2)
packageVersion("dada2") # check dada2 version
## [1] '1.22.0'
library(Biostrings)
library(ShortRead)
library(seqTools) # per base sequence content
library(phyloseq)
library(ggplot2)
library(data.table)
library(plyr)
library(dplyr)
library(qckitfastq) # per base sequence content
library(stringr)
# ROOT DIRECTORY (to modify on your computer)
path.root <- "~/Projects/MetaIBS"
path.nagel <- file.path(path.root, "scripts/analysis-individual/Nagel-2016")
path.data <- file.path(path.root, "data/analysis-individual/Nagel-2016")
First, we import the fastq files containing the raw reads. The samples were kindly shared by the author.
# Save the path to the directory containing the fastq zipped files
path.fastq <- file.path(path.data, "raw_fastq")
# list.files(path.fastq) # check we are in the right directory
# fastq filenames have format: SAMPLENAME.fastq
# Saves the whole directory path to each file name
FNs <- sort(list.files(path.fastq, pattern=".fastq", full.names = TRUE))
show(FNs[1:5])
# Extract sample names, assuming filenames have format: SAMPLENAME.fastq
sample.names <- sapply(strsplit(basename(FNs), "_"), `[`, 1)
show(sample.names) # saves only the file name (without the path)
# After demultiplexing, sanity check of read length
seqlen.tab <- table(width(readFastq(FNs[1])))
plot(x=as.integer(names(seqlen.tab)), y=log10(as.integer(seqlen.tab)))
table(width(readFastq(FNs[1]))==0) # some reads are of length 0 => need to remove them
Some reads are of length 0. To avoid having issues with looking at the quality profile of the samples, we will first remove all reads below 50bp.
# Place filtered reads in a original_trim/ subdirectory
trim_samples <- file.path(path.data, "raw_fastq_trimmed", paste0(sample.names, "_trim.fastq.gz"))
names(trim_samples) <- sample.names # assign names
# Remove reads with less than 50bp
trim <- filterAndTrim(fwd = FNs, filt = trim_samples,
truncQ=0, # default truncQ is 2
minLen = 50, # Discard reads shorter than 50 bp.
compress=TRUE,
verbose=TRUE)
# ~ 85-98% sequences filtered
Now we can look at the quality profile of the samples.
# Look at quality of all files
for (i in 1:4){ # 1:length(trim_samples)
show(plotQualityProfile(trim_samples[i]))
}
# Look at nb of reads per sample
# raw_stats <- data.frame('sample' = sample.names,
# 'reads' = fastqq(trim_samples)@nReads)
# min(raw_stats$reads)
# max(raw_stats$reads)
# mean(raw_stats$reads)
We will have a quick peak at the per base sequence content of the reads in some samples, to make sure there is no anomaly.
# Look at per base sequence content
fseq <- seqTools::fastqq(trim_samples[1])
## [fastqq] File ( 1/1) '/Users/enigma/Projects/MetaIBS/data/analysis-individual/Nagel-2016/raw_fastq_trimmed/DN1_trim.fastq.gz' done.
rc <- read_content(fseq)
plot_read_content(rc) + labs(title = "Per base sequence content")
plot_read_content(rc) + xlim(0,50) + labs(title = "Per base sequence content")
Now, we will look whether the reads still contain the primers. Primer sequences are given in the methods section of the paper.
FWD <- "GTGCCAGCMGCCGCGGTAA" # 515F primer sequence
REV <- "GGACTACHVGGGTWTCTAAT" # 806R primer sequence
# Function that, from the primer sequence, will return all combinations possible (complement, reverse complement, ...)
allOrients <- function(primer) {
# Create all orientations of the input sequence
require(Biostrings)
dna <- DNAString(primer) # The Biostrings works w/ DNAString objects rather than character vectors
orients <- c(Forward = dna, Complement = complement(dna), Reverse = reverse(dna),
RevComp = reverseComplement(dna))
return(sapply(orients, toString))
}
# Get all combinations of the primer sequences
FWD.orients <- allOrients(FWD) # 515F
REV.orients <- allOrients(REV) # 806R
FWD.orients # sanity check
## Forward Complement Reverse
## "GTGCCAGCMGCCGCGGTAA" "CACGGTCGKCGGCGCCATT" "AATGGCGCCGMCGACCGTG"
## RevComp
## "TTACCGCGGCKGCTGGCAC"
REV.orients
## Forward Complement Reverse
## "GGACTACHVGGGTWTCTAAT" "CCTGATGDBCCCAWAGATTA" "TAATCTWTGGGVHCATCAGG"
## RevComp
## "ATTAGAWACCCBDGTAGTCC"
# Function that counts number of reads in which a sequence is found
primerHits <- function(primer, fn) {
nhits <- vcountPattern(primer, sread(readFastq(fn)), fixed = FALSE, max.mismatch = 2)
return(sum(nhits > 0))
}
# Look in all samples if we find the primers
for (i in 1:5){
cat("SAMPLE", sample.names[i], "with total number of", raw_stats[i,'reads'], "reads\n\n")
x <- rbind(ForwardPrimer = sapply(FWD.orients, primerHits, fn = trim_samples[[i]]),
ReversePrimer = sapply(REV.orients, primerHits, fn = trim_samples[[i]]))
print(x)
cat("\n____________________________________________\n\n")
}
## SAMPLE DN1 with total number of 31424 reads
##
## Forward Complement Reverse RevComp
## ForwardPrimer 21239 0 0 0
## ReversePrimer 0 0 0 14432
##
## ____________________________________________
##
## SAMPLE DN10 with total number of 33687 reads
##
## Forward Complement Reverse RevComp
## ForwardPrimer 27440 0 0 0
## ReversePrimer 0 0 0 18160
##
## ____________________________________________
##
## SAMPLE DN11 with total number of 41542 reads
##
## Forward Complement Reverse RevComp
## ForwardPrimer 39768 0 0 0
## ReversePrimer 0 0 0 24986
##
## ____________________________________________
##
## SAMPLE DN12 with total number of 40803 reads
##
## Forward Complement Reverse RevComp
## ForwardPrimer 38960 0 0 0
## ReversePrimer 0 0 0 25900
##
## ____________________________________________
##
## SAMPLE DN13 with total number of 41982 reads
##
## Forward Complement Reverse RevComp
## ForwardPrimer 40741 0 0 0
## ReversePrimer 0 0 0 29953
##
## ____________________________________________
Let’s have a quick look at where primers are positioned in the reads
# Function that gets position in which sequence is found
primerHitsPosition <- function(primer, fn){
hits <- as.data.frame(vmatchPattern(primer, sread(readFastq(fn)), fixed = FALSE, max.mismatch = 2))
hits <- hits[,c("group", "start")]
colnames(hits) <- c("sample", "start")
hits$sample <- sapply(strsplit(basename(fn), "_"), `[`, 1)
hits$readslength <- seqTools::fastqq(fn)@maxSeqLen
return(hits)
}
# Get position of FWD primers
FWDpos <- data.frame()
for(i in 1:5){ # length(trim_samples)
cat("SAMPLE", i)
new <- primerHitsPosition(FWD.orients["Forward"], trim_samples[[i]])
FWDpos <- rbind(new, FWDpos)
}
## SAMPLE 1[fastqq] File ( 1/1) '/Users/enigma/Projects/MetaIBS/data/analysis-individual/Nagel-2016/raw_fastq_trimmed/DN1_trim.fastq.gz' done.
## SAMPLE 2[fastqq] File ( 1/1) '/Users/enigma/Projects/MetaIBS/data/analysis-individual/Nagel-2016/raw_fastq_trimmed/DN10_trim.fastq.gz' done.
## SAMPLE 3[fastqq] File ( 1/1) '/Users/enigma/Projects/MetaIBS/data/analysis-individual/Nagel-2016/raw_fastq_trimmed/DN11_trim.fastq.gz' done.
## SAMPLE 4[fastqq] File ( 1/1) '/Users/enigma/Projects/MetaIBS/data/analysis-individual/Nagel-2016/raw_fastq_trimmed/DN12_trim.fastq.gz' done.
## SAMPLE 5[fastqq] File ( 1/1) '/Users/enigma/Projects/MetaIBS/data/analysis-individual/Nagel-2016/raw_fastq_trimmed/DN13_trim.fastq.gz' done.
ggplot(FWDpos, aes(x=start))+
geom_density(aes(y=..scaled..)) +
xlim(c(0,max(FWDpos$readslength)))+
labs(x="start position of FWD primer", y="proportion of primers starting at x position")
# Get position of REV primers
REVpos <- data.frame()
for(i in 1:5){ # length(trim_samples)
cat("SAMPLE", i)
new <- primerHitsPosition(REV.orients["RevComp"], trim_samples[[i]])
REVpos <- rbind(new, REVpos)
}
## SAMPLE 1[fastqq] File ( 1/1) '/Users/enigma/Projects/MetaIBS/data/analysis-individual/Nagel-2016/raw_fastq_trimmed/DN1_trim.fastq.gz' done.
## SAMPLE 2[fastqq] File ( 1/1) '/Users/enigma/Projects/MetaIBS/data/analysis-individual/Nagel-2016/raw_fastq_trimmed/DN10_trim.fastq.gz' done.
## SAMPLE 3[fastqq] File ( 1/1) '/Users/enigma/Projects/MetaIBS/data/analysis-individual/Nagel-2016/raw_fastq_trimmed/DN11_trim.fastq.gz' done.
## SAMPLE 4[fastqq] File ( 1/1) '/Users/enigma/Projects/MetaIBS/data/analysis-individual/Nagel-2016/raw_fastq_trimmed/DN12_trim.fastq.gz' done.
## SAMPLE 5[fastqq] File ( 1/1) '/Users/enigma/Projects/MetaIBS/data/analysis-individual/Nagel-2016/raw_fastq_trimmed/DN13_trim.fastq.gz' done.
ggplot(REVpos, aes(x=start))+
geom_density(aes(y=..scaled..)) +
xlim(c(0,max(REVpos$readslength)))+
labs(x="start position of REV primer", y="proportion of primers starting at x position")
The reads indeed contain both primers (although only about 60% of reads have reverse primer). We will keep only reads containing both primers, and trim the primers. It seems like the reverse primer is found around 300bp, which is consistent with the length of the variable region.
# KEEP READS WITH PRIMER AND REMOVE PRIMER
# Place filtered files in a filtered/ subdirectory
filt1_samples <- file.path(path.data, "filtered1", paste0(sample.names, "_filt1.fastq.gz"))
# Assign names for the filtered fastq.gz files
names(filt1_samples) <- sample.names
# Filter first to take out primer
out1 <- removePrimers(fn = trim_samples, fout = filt1_samples,
primer.fwd = FWD.orients['Forward'],
primer.rev = REV.orients['RevComp'],
trim.fwd = TRUE,
trim.rev = TRUE,
orient = FALSE, # keep the reads in their original orientation
compress = TRUE, verbose = TRUE)
# Primer removal
out1[1:4,]
## reads.in reads.out
## DN1_trim.fastq.gz 31424 10193
## DN10_trim.fastq.gz 33687 15175
## DN11_trim.fastq.gz 41542 24197
## DN12_trim.fastq.gz 40803 24942
# Quality profile after primer removal
for (i in 1:4){
show(plotQualityProfile(filt1_samples[i]))
}
Then, we perform a quality filtering of the reads.
# Place filtered files in a filtered/ subdirectory
filt2_samples <- file.path(path.data, "filtered2", paste0(sample.names, "_filt2.fastq.gz"))
# Assign names for the filtered fastq.gz files
names(filt2_samples) <- sample.names
# Filter
out2 <- filterAndTrim(fwd = filt1_samples, filt = filt2_samples,
maxEE=3, # reads with more than 3 expected errors (sum(10e(-Q/10))) are discarded
truncQ=10, # Truncate reads at the first instance of a quality score less than or equal to truncQ.
minLen = 150, # Discard reads shorter than 150 bp.
compress=TRUE,
multithread=TRUE,
verbose=TRUE)
Let’s look at the output filtered fastq files as sanity check.
out2[1:4,] # show how many reads were filtered in each file
## reads.in reads.out
## DN1_filt1.fastq.gz 10193 8087
## DN10_filt1.fastq.gz 15175 12070
## DN11_filt1.fastq.gz 24197 20773
## DN12_filt1.fastq.gz 24942 20882
# Look at quality profile of filtered files
for (i in 1:4){
show(plotQualityProfile(filt2_samples[i]))
}
Now we will build the parametric error model, to be able to infer amplicon sequence variants (ASVs) later on.
set.seed(123)
err <- learnErrors(filt2_samples, multithread=TRUE, randomize=TRUE, verbose = 1)
The error rates for each possible transition (A→C, A→G, …) are shown. Points are the observed error rates for each consensus quality score. The black line shows the estimated error rates after convergence of the machine-learning algorithm. The red line shows the error rates expected under the nominal definition of the Q-score.
plotErrors(err, nominalQ = TRUE)
As the error rate model doesn’t fit well the observations for high quality scores, we will manually modify it to better fit.
# Modify error rate model
err_new <- err
# G2T
err_new$err_out['G2T','35':'38'] <- 0.0016
err_new$err_out['G2T','38'] <- 0.0014
err_new$err_out['G2T','39'] <- 0.0013
err_new$err_out['G2T','40'] <- 0.00115
err_new$err_out['G2T','41'] <- 0.0010
err_new$err_out['G2T','42'] <- 0.00085
err_new$err_out['G2T','43'] <- 0.0007
err_new$err_out['G2T','44'] <- 0.00055
err_new$err_out['G2T','45'] <- 0.0004
# All the other transitions/transversions
err_new$err_out['A2C','35':'46'] <- 0.0008
err_new$err_out['A2G','38':'46'] <- 0.007
err_new$err_out['A2T','37':'46'] <- 0.002
err_new$err_out['C2A','37':'46'] <- 0.0008
err_new$err_out['C2G','39':'46'] <- 0.0025
err_new$err_out['C2T','38':'46'] <- 0.007
err_new$err_out['G2A','36':'46'] <- 0.0035
err_new$err_out['G2C','36':'46'] <- 0.0006
err_new$err_out['T2A','38':'46'] <- 0.0025
err_new$err_out['T2C','36':'46'] <- 0.0065
err_new$err_out['T2G','36':'46'] <- 0.0025
# Check modified error rate model
plotErrors(err_new, nominalQ = TRUE)
The dada() algorithm infers sequence variants based on estimated errors (previous step). Firstly, we de-replicate the reads in each sample, to reduce the computation time. De-replication is a common step in almost all modern ASV inference (or OTU picking) pipelines, but a unique feature of derepFastq is that it maintains a summary of the quality information for each dereplicated sequence in $quals.
# Prepare empty vector for the infered sequences
seq_infered <- vector("list", length(sample.names))
names(seq_infered) <- sample.names # names of each row will be the sample names
# Iterate through the 30 samples
for(sampl in sample.names) {
cat("Processing:", sampl, "\n")
derep <- derepFastq(filt2_samples[[sampl]]) # dereplicate the reads in the sample
seq_infered[[sampl]] <- dada(derep, err=err_new, multithread=TRUE, # default parameters
HOMOPOLYMER_GAP_PENALTY=-1, BAND_SIZE=32) # parameters for Ion Torrent recommended
}
# Inspect the infered sequence variants from sample 1:5
for (i in 1:5){
print(seq_infered[[i]])
print("________________")
}
## dada-class: object describing DADA2 denoising results
## 72 sequence variants were inferred from 3780 input unique sequences.
## Key parameters: OMEGA_A = 1e-40, OMEGA_C = 1e-40, BAND_SIZE = 32
## [1] "________________"
## dada-class: object describing DADA2 denoising results
## 151 sequence variants were inferred from 6122 input unique sequences.
## Key parameters: OMEGA_A = 1e-40, OMEGA_C = 1e-40, BAND_SIZE = 32
## [1] "________________"
## dada-class: object describing DADA2 denoising results
## 147 sequence variants were inferred from 7911 input unique sequences.
## Key parameters: OMEGA_A = 1e-40, OMEGA_C = 1e-40, BAND_SIZE = 32
## [1] "________________"
## dada-class: object describing DADA2 denoising results
## 189 sequence variants were inferred from 8768 input unique sequences.
## Key parameters: OMEGA_A = 1e-40, OMEGA_C = 1e-40, BAND_SIZE = 32
## [1] "________________"
## dada-class: object describing DADA2 denoising results
## 153 sequence variants were inferred from 8981 input unique sequences.
## Key parameters: OMEGA_A = 1e-40, OMEGA_C = 1e-40, BAND_SIZE = 32
## [1] "________________"
We can now construct an amplicon sequence variant table (ASV) table, a higher-resolution version of the OTU table produced by traditional methods.
# Make sequence table from the infered sequence variants
seqtable <- makeSequenceTable(seq_infered)
# We should have 30 samples (30 rows)
dim(seqtable)
## [1] 30 1323
# Inspect distribution of sequence lengths
hist(nchar(getSequences(seqtable)), breaks = 100, xlab = "ASV length", ylab = "Number of ASVs", main="")
The core dada method corrects substitution and indel errors, but chimeras remain. Fortunately, the accuracy of sequence variants after denoising makes identifying chimeric ASVs simpler than when dealing with fuzzy OTUs. Chimeric sequences are identified if they can be exactly reconstructed by combining a left-segment and a right-segment from two more abundant “parent” sequences.
seqtable.nochim <- removeBimeraDenovo(seqtable, method="consensus", multithread=TRUE, verbose=TRUE)
# Check how many sequence variants we have after removing chimeras
dim(seqtable.nochim)
## [1] 30 1092
# Check how many reads we have after removing chimeras (we should keep the vast majority of the reads, like > 80%)
sum(seqtable.nochim)/sum(seqtable)
## [1] 0.9758994
Sanity check before assigning taxonomy.
# Function that counts nb of reads
getN <- function(x) sum(getUniques(x))
# Table that will count number of reads for each process of interest (input reads, filtered reads, denoised reads, non chimera reads)
track <- cbind(out1,
out2[,2],
sapply(seq_infered, getN),
rowSums(seqtable.nochim),
lapply(rowSums(seqtable.nochim)*100/out1[,1], as.integer))
# Assign column and row names
colnames(track) <- c("input", "primer-filt", "quality-filt", "denoised", "nonchim", "%input->output")
rownames(track) <- sample.names
# Show final table: for each row/sample, we have shown the initial number of reads, filtered reads, denoised reads, and non chimera reads
track
## input primer-filt quality-filt denoised nonchim %input->output
## DN1 31424 10193 8087 7890 7819 24
## DN10 33687 15175 12070 11850 11828 35
## DN11 41542 24197 20773 20577 20456 49
## DN12 40803 24942 20882 20471 19964 48
## DN13 41982 29227 24514 24266 24226 57
## DN14 42424 24666 20429 20296 18959 44
## DN15 37990 21802 17476 17252 17149 45
## DN2 35202 13678 11247 11056 10923 31
## DN3 35974 16216 13518 13341 13171 36
## DN4 38369 17413 13694 13519 13379 34
## DN5 37350 17392 14787 14672 14421 38
## DN6 37916 18004 15668 15340 14836 39
## DN7 31704 14910 12296 12188 12188 38
## DN8 18838 8114 6496 6416 6373 33
## DN9 41887 18156 14161 14034 13909 33
## HN1 52361 28770 24065 23848 22807 43
## HN10 40024 27368 24634 24144 23917 59
## HN11 37893 23372 21245 21042 20940 55
## HN12 48034 26205 22944 22782 21863 45
## HN13 50183 28786 25741 25476 21896 43
## HN14 50573 26442 20949 20748 20287 40
## HN15 45578 28139 23727 23468 22289 48
## HN2 42337 24663 17150 17071 17053 40
## HN3 43063 25912 23412 23096 22428 52
## HN4 44553 24358 17246 16941 16624 37
## HN5 41154 25714 22446 22386 22341 54
## HN6 34736 11573 6682 6501 6496 18
## HN7 46448 28456 25118 24741 23963 51
## HN8 43991 22984 20239 20093 20087 45
## HN9 44593 24897 21503 21412 21385 47
Extensions: The dada2 package also implements a method to make species level assignments based on exact matching between ASVs and sequenced reference strains. Recent analysis suggests that exact matching (or 100% identity) is the only appropriate way to assign species to 16S gene fragments. Currently, species-assignment training fastas are available for the Silva and RDP 16S databases. To follow the optional species addition step, download the silva_species_assignment_v132.fa.gz file, and place it in the directory with the fastq files.
path.silva <- file.path(path.root, "data/analysis-individual/CLUSTER/taxonomy/silva-taxonomic-ref")
# Assign taxonomy (with silva v138)
set.seed(123)
taxa <- assignTaxonomy(seqtable.nochim, file.path(path.silva, "silva_nr99_v138.1_train_set.fa.gz"),
tryRC = TRUE, # try reverse complement of the sequences
multithread=TRUE, verbose = TRUE)
# Add species assignment
set.seed(123)
taxa <- addSpecies(taxa, file.path(path.silva, "silva_species_assignment_v138.1.fa.gz"))
# Check how the taxonomy table looks like
taxa.print <- taxa
rownames(taxa.print) <- NULL # Removing sequence rownames for display only
head(taxa.print)
## Kingdom Phylum Class Order
## [1,] "Bacteria" "Bacteroidota" "Bacteroidia" "Bacteroidales"
## [2,] "Bacteria" "Firmicutes" "Clostridia" "Lachnospirales"
## [3,] "Bacteria" "Bacteroidota" "Bacteroidia" "Bacteroidales"
## [4,] "Bacteria" "Firmicutes" "Clostridia" "Lachnospirales"
## [5,] "Bacteria" "Firmicutes" "Clostridia" "Oscillospirales"
## [6,] "Bacteria" "Bacteroidota" "Bacteroidia" "Bacteroidales"
## Family Genus Species
## [1,] "Bacteroidaceae" "Bacteroides" "vulgatus"
## [2,] "Lachnospiraceae" "Blautia" NA
## [3,] "Bacteroidaceae" "Bacteroides" NA
## [4,] "Lachnospiraceae" "Agathobacter" NA
## [5,] "Ruminococcaceae" "Faecalibacterium" NA
## [6,] "Bacteroidaceae" "Bacteroides" "uniformis"
table(taxa.print[,1], useNA="ifany") # Show the different kingdoms (should be only bacteria)
##
## Archaea Bacteria
## 3 1089
table(taxa.print[,2], useNA="ifany") # Show the different phyla
##
## Actinobacteriota Bacteroidota Cyanobacteria Desulfobacterota
## 34 205 6 12
## Elusimicrobiota Euryarchaeota Firmicutes Fusobacteriota
## 1 2 782 5
## Proteobacteria Synergistota Thermoplasmatota Verrucomicrobiota
## 33 3 1 8
We will remove any sample with less than 500 reads from further analysis, and also any ASVs with unassigned phyla.
The preprocessing will be easier to do with ASV, taxonomic and metadata tables combined in a phyloseq object.
#_________________________
# Import metadata
metadata_table <- read.table(file.path(path.data, "00_Metadata-Nagel/Metadata-Nagel.csv"), header = TRUE, sep = ",", row.names=1)
head(metadata_table)
#_________________________
# Create phyloseq object
physeq <- phyloseq(otu_table(seqtable.nochim, taxa_are_rows=FALSE), # by default, in otu_table the sequence variants are in rows
sample_data(metadata_table),
tax_table(taxa))
# Remove taxa that are eukaryota, or have unassigned Phyla
physeq <- subset_taxa(physeq, Kingdom != "Eukaryota")
physeq <- subset_taxa(physeq, !is.na(Phylum))
# Remove samples with less than 500 reads
physeq <- prune_samples(sample_sums(physeq)>=500, physeq)
# No sample was deleted, so we don't need to remove taxa present in low-count samples
# physeq <- prune_taxa(taxa_sums(physeq)>0, physeq)
# Absolute abundance
# plot_bar(physeq, fill = "Phylum")+ facet_wrap("host_disease", scales="free_x") + theme(axis.text.x = element_blank())
# Relative abundance for Phylum
phylum.table <- physeq %>%
tax_glom(taxrank = "Phylum") %>% # agglomerate at phylum level
transform_sample_counts(function(x) {x/sum(x)} ) %>% # Transform to rel. abundance
psmelt() # Melt to long format
ggplot(phylum.table, aes(x = Sample, y = Abundance, fill = Phylum))+
facet_wrap(~ host_disease, scales = "free") + # scales = "free" removes empty lines
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(size = 5, angle = -90))+
labs(x = "Samples", y = "Relative abundance")
# Save to disk
saveRDS(raw_stats, file.path(path.data, "01_Dada2-Nagel/raw_stats.rds"))
saveRDS(out1, file.path(path.data, "01_Dada2-Nagel/out1.rds"))
saveRDS(out2, file.path(path.data, "01_Dada2-Nagel/out2.rds"))
saveRDS(err, file.path(path.data, "01_Dada2-Nagel/errorRates.rds"))
saveRDS(err_new, file.path(path.data, "01_Dada2-Nagel/errorRates_modif.rds"))
saveRDS(seq_infered, file.path(path.data, "01_Dada2-Nagel/seq_infered.rds"))
# Taxa & Phyloseq object
saveRDS(taxa, file.path(path.data, "01_Dada2-Nagel/taxa_nagel.rds"))
saveRDS(physeq, file.path(path.root, "data/analysis-individual/CLUSTER/PhyloTree/input/physeq_nagel.rds"))
saveRDS(physeq, file.path(path.root, "data/phyloseq-objects/phyloseq-without-phylotree/physeq_nagel.rds"))
sessionInfo()
## R version 4.1.3 (2022-03-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur/Monterey 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] stringr_1.5.0 qckitfastq_1.10.0
## [3] dplyr_1.1.1 plyr_1.8.8
## [5] data.table_1.14.8 ggplot2_3.4.2
## [7] phyloseq_1.38.0 seqTools_1.28.0
## [9] zlibbioc_1.40.0 ShortRead_1.52.0
## [11] GenomicAlignments_1.30.0 SummarizedExperiment_1.24.0
## [13] Biobase_2.54.0 MatrixGenerics_1.6.0
## [15] matrixStats_0.63.0 Rsamtools_2.10.0
## [17] GenomicRanges_1.46.1 BiocParallel_1.28.3
## [19] Biostrings_2.62.0 GenomeInfoDb_1.30.1
## [21] XVector_0.34.0 IRanges_2.28.0
## [23] S4Vectors_0.32.4 BiocGenerics_0.40.0
## [25] dada2_1.22.0 Rcpp_1.0.10
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-162 bitops_1.0-7 RColorBrewer_1.1-3
## [4] tools_4.1.3 bslib_0.4.2 utf8_1.2.3
## [7] R6_2.5.1 vegan_2.6-4 mgcv_1.8-42
## [10] DBI_1.1.3 colorspace_2.1-0 permute_0.9-7
## [13] rhdf5filters_1.6.0 ade4_1.7-22 withr_2.5.0
## [16] tidyselect_1.2.0 compiler_4.1.3 cli_3.6.1
## [19] DelayedArray_0.20.0 labeling_0.4.2 sass_0.4.5
## [22] scales_1.2.1 RSeqAn_1.14.0 digest_0.6.31
## [25] rmarkdown_2.21 jpeg_0.1-10 pkgconfig_2.0.3
## [28] htmltools_0.5.5 highr_0.10 fastmap_1.1.1
## [31] rlang_1.1.0 rstudioapi_0.14 farver_2.1.1
## [34] jquerylib_0.1.4 generics_0.1.3 hwriter_1.3.2.1
## [37] jsonlite_1.8.4 RCurl_1.98-1.12 magrittr_2.0.3
## [40] GenomeInfoDbData_1.2.7 biomformat_1.22.0 interp_1.1-4
## [43] Matrix_1.5-1 munsell_0.5.0 Rhdf5lib_1.16.0
## [46] fansi_1.0.4 ape_5.7-1 lifecycle_1.0.3
## [49] stringi_1.7.12 yaml_2.3.7 MASS_7.3-58.3
## [52] rhdf5_2.38.1 grid_4.1.3 parallel_4.1.3
## [55] crayon_1.5.2 deldir_1.0-6 lattice_0.20-45
## [58] splines_4.1.3 multtest_2.50.0 knitr_1.42
## [61] pillar_1.9.0 igraph_1.4.2 reshape2_1.4.4
## [64] codetools_0.2-19 glue_1.6.2 evaluate_0.20
## [67] latticeExtra_0.6-30 RcppParallel_5.1.7 png_0.1-8
## [70] vctrs_0.6.1 foreach_1.5.2 gtable_0.3.3
## [73] cachem_1.0.7 xfun_0.38 survival_3.5-5
## [76] tibble_3.2.1 iterators_1.0.14 cluster_2.1.4